# ============================================================================
# Script: Justifying European Border Policies — Dataset
# Author: Lisa Herbig
# Project: Justifying European Border Policies (JEPP)
# Date: 27 November 2025
# ============================================================================


# ------------------------------------------------------------------------------
# Libraries
# ------------------------------------------------------------------------------
library(readr)   
library(dplyr)   
library(tidyr)
library(stringr)

# ------------------------------------------------------------------------------
# Output helpers + folders
# ------------------------------------------------------------------------------

setwd("~/OneDrive - UvA/1. Justification Paper/JEPP_Submission/JustifyingEuropeanBorderPolicies_SupplementaryMaterials/data")

# Load the dataset
data <- read_csv("~/OneDrive - UvA/1. Justification Paper/Justification Paper - Analysis/Data/Datasets/Content Analysis/Final Coding/Justification_CODING FINAL-ClusterAnalysisReduced.csv")

# ------------------------------------------------------------------------------
# Reshape Dataset
# ------------------------------------------------------------------------------

# Separate document-specific variables
doc_vars <- data %>% dplyr::select(DOC_ID, CODER, DATE, DOC_TYPE)

# Gather the policy variables into a long format
data_long <- data %>%
  pivot_longer(
    cols = starts_with("P"),
    names_to = "variable",
    values_to = "value"
  ) %>%
  mutate(
    policy = str_extract(variable, "^P[1-6]"),
    variable = str_replace(variable, "^P[1-6]", "")
  )

# Spread the data back out to have one row per policy
data_wide <- data_long %>%
  pivot_wider(names_from = variable, values_from = value)

# Filter out rows without any policy information
data_wide <- data_wide %>%
  filter(!is.na(policy))

# Repeat document-specific variables for each policy
reshaped_data <- doc_vars[rep(seq_len(nrow(doc_vars)), each = 6), ]
reshaped_data$policy <- rep(paste0("P", 1:6), nrow(doc_vars))

# Merge the reshaped data with the policy-specific data
reshaped_data <- left_join(
  reshaped_data,
  data_wide,
  by = c("DOC_ID", "CODER", "DATE", "DOC_TYPE", "policy")
)


# Spread the data back out to have one row per policy
data_wide <- data_long %>%
  pivot_wider(names_from = variable, values_from = value)

# Filter out rows without any policy information
data_wide <- data_wide %>%
  filter(!is.na(policy))

# Repeat document-specific variables for each policy
reshaped_data <- doc_vars[rep(seq_len(nrow(doc_vars)), each = 6), ]
reshaped_data$policy <- rep(paste0("P", 1:6), nrow(doc_vars))

# Merge the reshaped data with the policy-specific data
reshaped_data <- left_join(
  reshaped_data,
  data_wide,
  by = c("DOC_ID", "CODER", "DATE", "DOC_TYPE", "policy")
)

# Function to rename columns to a standardized format
rename_columns_standard <- function(df) {
  names(df) <- str_replace_all(names(df), "_CTX_", "CTX_")
  names(df) <- str_replace_all(names(df), "_POL_", "POL_")
  names(df) <- str_replace_all(names(df), "_DIR_", "DIR_")
  names(df) <- str_replace_all(names(df), "_T_", "T_")
  df
}

# Apply the function to rename columns
reshaped_data <- rename_columns_standard(reshaped_data)

# Final result
reshaped_data <- reshaped_data %>%
  dplyr::select(DOC_ID, CODER, DATE, DOC_TYPE, everything())

# View the first few rows of the final dataset
head(reshaped_data)


########Write code to save policy specific dataset########

######Now create one row per justification

# Assuming reshaped_data is the current dataset

# Assuming reshaped_data is the current dataset with six rows per document (one for each policy)

# Create three identical datasets
data_J1 <- reshaped_data
data_J2 <- reshaped_data
data_J3 <- reshaped_data


# Remove variables for J2 and J3 from data_J1
data_J1 <- data_J1 %>% dplyr::select(-starts_with("J2_"), -starts_with("J3_"))

# Remove variables for J1 and J3 from data_J2
data_J2 <- data_J2 %>% dplyr::select(-starts_with("J1_"), -starts_with("J3_"))

# Remove variables for J1 and J2 from data_J3
data_J3 <- data_J3 %>% dplyr::select(-starts_with("J1_"), -starts_with("J2_"))

# Function to rename justification variables to a standardized format
rename_justification_vars <- function(df, justification) {
  names(df) <- str_replace_all(names(df), paste0("^", justification, "_"), "J_")
  df
}

# Apply the function to rename columns
data_J1 <- rename_justification_vars(data_J1, "J1")
data_J2 <- rename_justification_vars(data_J2, "J2")
data_J3 <- rename_justification_vars(data_J3, "J3")

# Add a justification_id column to each dataset
data_J1 <- data_J1 %>% mutate(justification_id = "J1")
data_J2 <- data_J2 %>% mutate(justification_id = "J2")
data_J3 <- data_J3 %>% mutate(justification_id = "J3")

# Combine all datasets into one
final_reshaped_data <- bind_rows(data_J1, data_J2, data_J3)

# View the first few rows of the final dataset
head(final_reshaped_data)
names(final_reshaped_data)

# ------------------------------------------------------------------------------
# Save
# ------------------------------------------------------------------------------

write.csv(final_reshaped_data, "final_reshaped_data.csv", row.names = FALSE)
